home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Disc to the Future 2
/
Disc to the Future Part II Programmer's Reference (Wayzata Technology)(6013)(1992).bin
/
MAC
/
MPW_C
/
SPELL__
/
CHECK.C
next >
Wrap
Text File
|
1990-07-03
|
3KB
|
109 lines
/*
File: check.c
Author: Graham Toal
Purpose: Check a word using DAWG or TRIE.
Functions exported: dawg_check, pack_check
Internal function: dawg_ck, pack_ck
Description:
call as: dawg_check(dawg, "word")
pack_check(trie, "word")
Simple spelling-check. Unfortunately the two interfaces for
DAWGs and TRIEs are different, and even DAWG stuff differs from
program to program. The problem is primarily in how to spot
the end of a search; at the moment it is done when a particular
pointer points to the root. Unfortunately we enter the data
structure either at root+1 or at some arbitrary point, so this
scheme means passing in two pointers. The next idea is to check
that the *data* pointed to by the terminating pointer is 0; this
would be OK but I was hoping to leave the initial word in the
dawg free for expansion... (dawg contains [0, Node1..NodeN])
*best* idea would be to terminate on *INDEX* being 0. This is
probably also simplest & I'll code it up properly when I'm awake...
*/
static int
#ifdef PROTOTYPES
dawg_ck(NODE PCCRAP *dict, NODE PCCRAP *edge, char *word)
#else
dawg_ck(dict, edge, word)
NODE PCCRAP *dict;
NODE PCCRAP *edge;
char *word;
#endif
{
if (edge == dict) return(0!=0);
for (;;) {
if (*word == (((*edge >> V_LETTER) & M_LETTER))) {
if (*++word == '\0') {
return((*edge & M_END_OF_WORD) != 0);
} else {
if ((edge = dict+(*edge & M_NODE_POINTER)) == dict) break;
continue;
}
}
if (((*edge++) & M_END_OF_NODE) != 0) break;
}
return(0!=0);
}
int
#ifdef PROTOTYPES
dawg_check(NODE PCCRAP *dict, char *word)
#else
dawg_check(dict, word)
NODE PCCRAP *dict;
char *word;
#endif
{
return(dawg_ck(dict, dict+ROOT_NODE, word));
}
static int
#ifdef PROTOTYPES
pack_ck(NODE PCCRAP *ptrie, INDEX p, char *s)
#else
pack_ck(ptrie, p, s)
NODE PCCRAP *ptrie;
INDEX p;
char *s;
#endif
{
/* Note: node is passed in as a parameter so that it is in a register -
otherwise it would take an extra instruction every time it was accessed.
We trust the compiler to allocate register variables most efficiently --
when people tinker, it might improve one machine but ruin another...
*/
#define next_letter(s) \
p = ptrie[p + *s]; \
if (((p >> V_LETTER) & M_LETTER) != *s++) return(0!=0); \
if (*s == '\0') return((p >> V_END_OF_NODE) != 0); \
if ((p &= M_NODE_POINTER) == 0) return(0!=0)
/* Depending on whether your machine has a cache or not, and whether
it optimises pipeline fetches, you should decrease or increase the
number of macro instantiations in the loop below appropriately */
for (;;) {
next_letter(s); next_letter(s); next_letter(s); next_letter(s);
}
}
int
#ifdef PROTOTYPES
pack_check(NODE PCCRAP *ptrie, char *s)
#else
pack_check(ptrie, s)
NODE PCCRAP *ptrie;
char *s;
#endif
{
return(pack_ck(ptrie, 1L, s));
}